\documentclass[a4paper]{article}
\usepackage{xeCJK}
\setCJKmainfont{WenQuanYi Micro Hei}
\usepackage[affil-it]{authblk}
\usepackage[backend=bibtex,style=numeric]{biblatex}
\usepackage{graphicx}
\usepackage{geometry}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\geometry{margin=1.5cm, vmargin={0pt,1cm}}
\setlength{\topmargin}{-1cm}
\setlength{\paperheight}{29.7cm}
\setlength{\textheight}{25.3cm}

\addbibresource{citation.bib}

\begin{document}
% =================================================
\title{Numerical Analysis homework \# 5}

\author{王劼 Wang Jie 3220100105
  \thanks{Electronic address: \texttt{2645443470@qq.com}}}
\affil{(math), Zhejiang University }


\date{Due time: \today}

\maketitle

\begin{abstract}
    theoretical homework 5  
\end{abstract}





% ============================================
\section*{theoretical homework}

Complete the theoretical homework for section 5.6, and the programming homework is optional. Not submitting will not affect your regular grade, but submitting can be considered to offset the scores of previous homework assignments. \cite{wangheyu2024}

\subsection*{Question 5.6.1 I}

\textbf{Answer:}\\

Let \( C[a, b] \) be the space of continuous functions on the interval \( [a, b] \), and define the inner product and norm as follows:

\[
\langle u, v \rangle := \int_a^b \rho(t) u(t) \overline{v(t)} \, dt, \quad \| u \| := \left( \int_a^b \rho(t) |u(t)|^2 \, dt \right)^{1/2},
\]
where \( \rho(t) > 0 \) and \( \rho(t) \in L[a, b] \) (i.e., \( \rho(t) \) is integrable).

We aim to show that \( C[a, b] \) with the given inner product and norm is both an inner product space and a normed vector space.

\section{Inner Product Space Properties}

We need to verify that the inner product satisfies the following three properties:

\subsection{Linearity in the First Argument}
For \( u, v, w \in C[a, b] \) and \( \alpha \in \mathbb{C} \), we need to show the inner product is linear in the first argument:

\[
\langle u + w, v \rangle = \langle u, v \rangle + \langle w, v \rangle, \quad \langle \alpha u, v \rangle = \alpha \langle u, v \rangle.
\]

\textbf{Proof:}

First, calculate \( \langle u + w, v \rangle \):

\[
\langle u + w, v \rangle = \int_a^b \rho(t) (u(t) + w(t)) \overline{v(t)} \, dt
= \int_a^b \rho(t) u(t) \overline{v(t)} \, dt + \int_a^b \rho(t) w(t) \overline{v(t)} \, dt
= \langle u, v \rangle + \langle w, v \rangle.
\]

For a scalar \( \alpha \in \mathbb{C} \), we compute \( \langle \alpha u, v \rangle \):

\[
\langle \alpha u, v \rangle = \int_a^b \rho(t) (\alpha u(t)) \overline{v(t)} \, dt = \alpha \int_a^b \rho(t) u(t) \overline{v(t)} \, dt = \alpha \langle u, v \rangle.
\]

Thus, linearity in the first argument holds.

\subsection{Conjugate Symmetry}
We need to show that for any \( u, v \in C[a, b] \), the inner product satisfies conjugate symmetry:

\[
\langle u, v \rangle = \overline{\langle v, u \rangle}.
\]

\textbf{Proof:}

We compute \( \langle u, v \rangle \) and \( \langle v, u \rangle \):

\[
\langle u, v \rangle = \int_a^b \rho(t) u(t) \overline{v(t)} \, dt,
\]
\[
\langle v, u \rangle = \int_a^b \rho(t) v(t) \overline{u(t)} \, dt.
\]

By the properties of the complex conjugate, we can swap \( u(t) \) and \( v(t) \) and take the complex conjugate:

\[
\overline{\langle v, u \rangle} = \overline{\int_a^b \rho(t) v(t) \overline{u(t)} \, dt} = \int_a^b \rho(t) u(t) \overline{v(t)} \, dt = \langle u, v \rangle.
\]

Thus, the inner product satisfies conjugate symmetry.

\subsection{Positive Definiteness}
We need to show that the inner product satisfies positive definiteness, i.e.,

\[
\langle u, u \rangle = \int_a^b \rho(t) |u(t)|^2 \, dt \geq 0, \quad \langle u, u \rangle = 0 \text{ if and only if } u = 0.
\]

\textbf{Proof:}

First, since \( \rho(t) > 0 \) and \( |u(t)|^2 \geq 0 \), it follows that \( \langle u, u \rangle \geq 0 \). Moreover, if \( \langle u, u \rangle = 0 \), then \( |u(t)|^2 = 0 \) for all \( t \in [a, b] \), which implies \( u(t) = 0 \) for all \( t \in [a, b] \).

Thus, the inner product satisfies positive definiteness.

\section{Normed Vector Space Properties}

We now show that \( C[a, b] \) is a normed vector space by verifying that the norm satisfies the following three properties:

\subsection{Non-negativity}
We need to show that the norm is non-negative, and that \( \| u \| = 0 \) if and only if \( u = 0 \).

\textbf{Proof:}

By the definition of the norm:

\[
\| u \| = \left( \int_a^b \rho(t) |u(t)|^2 \, dt \right)^{1/2}.
\]

Since \( \rho(t) > 0 \) and \( |u(t)|^2 \geq 0 \), it follows that \( \| u \| \geq 0 \). Moreover, \( \| u \| = 0 \) if and only if \( |u(t)|^2 = 0 \) for all \( t \in [a, b] \), which implies \( u = 0 \).

Thus, the norm satisfies non-negativity.

\subsection{Triangle Inequality}
We need to show that for any \( u, v \in C[a, b] \), the triangle inequality holds:

\[
\| u + v \| \leq \| u \| + \| v \|.
\]

\textbf{Proof:}

By the Cauchy-Schwarz inequality, we have:

\[
\| u + v \|^2 = \int_a^b \rho(t) |u(t) + v(t)|^2 \, dt.
\]

Expanding the square inside the integral:

\[
\| u + v \|^2 = \int_a^b \rho(t) \left( |u(t)|^2 + 2 \Re(u(t) \overline{v(t)}) + |v(t)|^2 \right) \, dt.
\]

This splits into three terms:

\[
\| u + v \|^2 = \int_a^b \rho(t) |u(t)|^2 \, dt + 2 \Re \int_a^b \rho(t) u(t) \overline{v(t)} \, dt + \int_a^b \rho(t) |v(t)|^2 \, dt.
\]

By the Cauchy-Schwarz inequality for integrals:

\[
\left| \int_a^b \rho(t) u(t) \overline{v(t)} \, dt \right| \leq \left( \int_a^b \rho(t) |u(t)|^2 \, dt \right)^{1/2} \left( \int_a^b \rho(t) |v(t)|^2 \, dt \right)^{1/2},
\]

we obtain:

\[
\| u + v \|^2 \leq (\| u \| + \| v \|)^2.
\]

Thus, the triangle inequality holds: \( \| u + v \| \leq \| u \| + \| v \| \).

\subsection{Homogeneity}
We need to show that for any scalar \( \alpha \in \mathbb{C} \) and \( u \in C[a, b] \), we have:

\[
\| \alpha u \| = |\alpha| \| u \|.
\]

\textbf{Proof:}

We compute \( \| \alpha u \|^2 \):

\[
\| \alpha u \|^2 = \int_a^b \rho(t) |\alpha u(t)|^2 \, dt = |\alpha|^2 \int_a^b \rho(t) |u(t)|^2 \, dt = |\alpha|^2 \| u \|^2.
\]

Thus, \( \| \alpha u \| = |\alpha| \| u \| \).

---

\textbf{Conclusion:}

Hence, \( C[a, b] \) with the given inner product and norm is both an inner product space and a normed vector space.
and norm is both an inner product space and a normed vector space over \( \mathbb{C} \).

\subsection*{Question 5.6.1 II}

\textbf{Answer:}\\

\subsection*{Part (a): Orthogonality of Chebyshev Polynomials}

The Chebyshev polynomials of the first kind, \( T_n(x) \), are defined recursively by:

\[
T_0(x) = 1, \quad T_1(x) = x, \quad T_n(x) = 2x T_{n-1}(x) - T_{n-2}(x), \quad n \geq 2.
\]

We are tasked with showing that the Chebyshev polynomials \( T_n(x) \) are orthogonal on \( [-1, 1] \) with respect to the inner product:

\[
\langle f, g \rangle = \int_{-1}^{1} f(x) g(x) \frac{1}{\sqrt{1 - x^2}} \, dx.
\]

Specifically, we need to show that:

\[
\langle T_n, T_m \rangle = \int_{-1}^{1} T_n(x) T_m(x) \frac{1}{\sqrt{1 - x^2}} \, dx = 0 \quad \text{for} \quad n \neq m,
\]

\subsubsection*{Step 1: Trigonometric Representation of Chebyshev Polynomials}

The Chebyshev polynomials \( T_n(x) \) can be expressed in terms of trigonometric functions:

\[
T_n(x) = \cos(n \theta), \quad \text{where} \quad x = \cos(\theta).
\]

Substituting \( x = \cos(\theta) \) into the inner product expression, we obtain:

\[
\langle T_n, T_m \rangle = \int_{-1}^{1} T_n(x) T_m(x) \frac{1}{\sqrt{1 - x^2}} \, dx = \int_0^\pi \cos(n \theta) \cos(m \theta) \, d\theta.
\]

\subsubsection*{Step 2: Evaluate the Integral}

Using the standard result for the integral of the product of cosines:

\[
\int_0^\pi \cos(n \theta) \cos(m \theta) \, d\theta = 
\begin{cases} 
0, & \text{if } n \neq m, \\
\pi, & \text{if } n = m = 0, \\
\frac{\pi}{2}, & \text{if } n = m \geq 1.
\end{cases}
\]

This proves that the Chebyshev polynomials \( T_n(x) \) are orthogonal on \( [-1, 1] \) with respect to the inner product defined by the weight function \( \rho(x) = \frac{1}{\sqrt{1 - x^2}} \).

\subsection*{Part (b): Normalization of the First Three Chebyshev Polynomials}

To normalize the first three Chebyshev polynomials, we need to divide each polynomial by the square root of its inner product with itself. From part (a), we know that:

the normalized Chebyshev polynomials \( \hat{T}_n(x) \) are given by:

\[
\hat{T}_n(x) = 
\begin{cases} 
\frac{T_n(x)}{\sqrt{\pi}}, & \text{if } n = 0, \\
\frac{\sqrt{2}T_n(x)}{\sqrt{\pi}}, & \text{if } n \geq 1.
\end{cases}
\]

Thus, the first three normalized Chebyshev polynomials are:

\[
\hat{T}_0(x) = \frac{T_0(x)}{\sqrt{\pi}} = \frac{1}{\sqrt{\pi}},
\]
\[
\hat{T}_1(x) = \frac{T_1(x)}{\sqrt{\pi}} = \frac{\sqrt{2}x}{\sqrt{\pi}},
\]
\[
\hat{T}_2(x) = \frac{T_2(x)}{\sqrt{\pi}} = \frac{\sqrt{2}(2x^2 - 1)}{\sqrt{\pi}}.
\]

These polynomials \( \hat{T}_n(x) \) for \( n = 0, 1, 2 \) form an orthonormal system on \( [-1, 1] \) with respect to the inner product defined by the weight function \( \rho(x) = \frac{1}{\sqrt{1 - x^2}} \).

\subsection*{Question 5.6.1 III}

\textbf{Answer:}\\

\subsection*{Part (a): Approximation using Fourier Expansion}

The Fourier expansion of a function \( f(x) \) with the weight function \( \rho(x) = \frac{1}{\sqrt{1 - x^2}} \) is given by:

\[
f(x) = \sum_{n=0}^{2} c_n T_n(x),
\]

where \( \hat{T}_n(x) \) are as follows:

\[
\hat{T}_0(x) = \frac{1}{\sqrt{\pi}},
\]
\[
\hat{T}_1(x) = \frac{\sqrt{2}x}{\sqrt{\pi}},
\]
\[
\hat{T}_2(x) = \frac{\sqrt{2}(2x^2 - 1)}{\sqrt{\pi}}.
\]

the Fourier coefficients \( c_n \) are:

\[
c_n = \langle f, \hat{T}_n \rangle = \int_{-1}^{1} f(x) \hat{T}_n(x) \rho(x) \, dx.
\]

For our case, we approximate \( y(x) = \sqrt{1 - x^2} \) using the Fourier coefficients for \( n = 0, 1, 2 \).

1. For \( n = 0 \), \( \hat{T}_0(x) = \frac{1}{\sqrt{\pi}} \):

\[
c_0 = \langle \sqrt{1 - x^2}, \frac{1}{\sqrt{\pi}} \rangle = \frac{2}{\sqrt{\pi}}.
\]

2. For \( n = 1 \), \( \hat{T}_1(x) = \frac{\sqrt{2}x}{\sqrt{\pi}} \):

\[
c_1 = \langle \sqrt{1 - x^2}, \frac{\sqrt{2}x}{\sqrt{\pi}} \rangle = 0.
\]

3. For \( n = 2 \), \( \hat{T}_1(x) = \frac{\sqrt{2}(2x^2 - 1)}{\sqrt{\pi}} \):

\[
c_2 = \langle \sqrt{1 - x^2}, \frac{\sqrt{2}(2x^2 - 1)}{\sqrt{\pi}} \rangle = -\frac{2\sqrt{2}}{3\sqrt{\pi}}.
\]

Thus, the Fourier expansion of \( y(x) = \sqrt{1 - x^2} \) up to the second term is:

\[
\hat{y}(x) = \frac{2}{\pi} - \frac{4(2x^2-1)}{3\pi} = \frac{10}{3\pi} - \frac{8x^2}{3\pi}.
\]

\subsection*{Part (b): Approximation using Normal Equations}

In this part, we approximate \( y(x) = \sqrt{1 - x^2} \) using the normal equations. The normal equations come from the condition that the residual \( r(x) = \sqrt{1 - x^2} - p(x) \) is orthogonal to the space spanned by \( 1, x, x^2 \) with respect to the inner product:

\[
\begin{bmatrix}
\langle 1, 1 \rangle & \langle x, 1 \rangle & \langle x^2, 1 \rangle \\
\langle 1, x \rangle & \langle x, x \rangle & \langle x^2, x \rangle \\
\langle 1, x^2 \rangle & \langle x, x^2 \rangle & \langle x^2, x^2 \rangle
\end{bmatrix}
\begin{bmatrix}
a_0 \\
a_1 \\
a_2
\end{bmatrix}
=
\begin{bmatrix}
\langle \sqrt{1 - x^2}, 1 \rangle \\
\langle \sqrt{1 - x^2}, x \rangle \\
\langle \sqrt{1 - x^2}, x^2 \rangle
\end{bmatrix}.
\]

Substituting the known inner products:

\[
\begin{bmatrix}
\pi & 0 & \frac{\pi}{2} \\
0 & \frac{\pi}{2} & 0 \\
\frac{\pi}{2} & 0 & \frac{3\pi}{8}
\end{bmatrix}
\begin{bmatrix}
a_0 \\
a_1 \\
a_2
\end{bmatrix}
=
\begin{bmatrix}
2 \\
0 \\
\frac{2}{3}
\end{bmatrix}.
\]

We solve this linear system to obtain the values of \( a_0, a_1, a_2 \).

\[
a_0 = \frac{10}{3\pi}.
\]
\[
a_1 = 0.
\]
\[
a_2 = -\frac{8}{3\pi}.
\]

Thus, the Fourier expansion of \( y(x) = \sqrt{1 - x^2} \) is:

\[
\hat{y}(x) = \frac{10}{3\pi} - \frac{8x^2}{3\pi}.
\]

\subsection*{Final Answer}

- (a) The quadratic polynomial approximation using Fourier expansion is \( \frac{10}{3\pi} - \frac{8x^2}{3\pi} \).
- (b) The quadratic polynomial approximation using normal equations is \( \frac{10}{3\pi} - \frac{8x^2}{3\pi} \).

\subsection*{Question 5.6.1 IV}

\textbf{Answer:}\\

Consider the sales record in Example 5.55, where the data points are given as follows:

\begin{table}[ht]
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|c|c|}
\hline
x & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 \\
\hline
y & 256 & 201 & 159 & 61 & 77 & 40 & 17 & 25 & 103 & 156 & 222 & 345 \\
\hline
\end{tabular}
\caption{Sales Data}
\end{table}

Given the sales record, we are asked to find the best quadratic approximation for the given data using orthonormal polynomials.

\subsection*{Part (a): Constructing Orthonormal Polynomials Using the Gram-Schmidt Process}

We start with the set of polynomials \( \{1, x, x^2\} \) and apply the Gram-Schmidt orthogonalization process to obtain orthonormal polynomials. Since the weight function \( \rho(x) = 1 \), the inner product is simply:

\[
\langle u(t), v(t) \rangle = \sum_{i=1}^{12} u(t_i) v(t_i).
\]

\textbf{Step 1: First Orthonormal Polynomial}

The first polynomial is \( p_0(x) = 1 \). We compute the inner product of \( p_0(x) \) with itself:

\[
\langle p_0, p_0 \rangle = \sum_{i=1}^{12} 1 = 12.
\]

Thus, the first orthonormal polynomial is:

\[
\hat{p}_0(x) = \frac{1}{\sqrt{12}}.
\]

\textbf{Step 2: Second Orthonormal Polynomial}

Next, we start with the polynomial \( x \) and subtract its projection onto \( p_0(x) \):

\[
p_1(x) = x - \langle x, \hat{p}_0 \rangle \hat{p}_0(x),
\]

where

\[
\langle x, \hat{p}_0 \rangle = \frac{1}{\sqrt{12}} \sum_{i=1}^{12} x_i.
\]

Since \( \sum_{i=1}^{12} x_i = 78 \), we have:

\[
\langle x, \hat{p}_0 \rangle = \frac{78}{\sqrt{12}}.
\]

We get:

\[
p_1(x) = x-\frac{13}{2}
\]

Thus,

\[
\langle p_1, p_1 \rangle = 143.
\]

We normalize \( p_1(x) \) to get the second orthonormal polynomial \( \hat{p}_1(x) = \frac{x-\frac{13}{2}}{\sqrt{143}}\).

\textbf{Step 3: Third Orthonormal Polynomial}

Now, we start with \( x^2 \) and subtract its projections onto \( \hat{p}_0(x) \) and \( \hat{p}_1(x) \):

\[
p_2(x) = x^2 - \langle x^2, \hat{p}_0 \rangle \hat{p}_0(x) - \langle x^2, \hat{p}_1 \rangle \hat{p}_1(x).
\]

We have:

\[
\langle x^2, \hat{p}_0 \rangle = \frac{650}{\sqrt{12}}.
\]

\[
\langle x^2, \hat{p}_1 \rangle = \frac{1859}{\sqrt{143}}.
\]

We get:

\[
p_2(x) = x^2 - \frac{650}{\sqrt{12}} \hat{p}_0(x) - \frac{1859}{\sqrt{143}} \hat{p}_1(x) = x^2 - 13x + \frac{91}{3}.
\]

Thus,

\[
\langle p_2, p_2 \rangle \approx 1334.6666666666667.
\]

We compute the necessary inner products and normalize the result to get \( \hat{p}_2(x) \approx \frac{p_2(x)}{\sqrt{1334.67}} = \frac{x^2 - 13x + \frac{91}{3}}{\sqrt{1334.67}} \approx 0.027372445072567947x^2-0.3558417859433833x+0.830297500534561\) .

so we get orthonormal polynomials\( \hat{p}_0(x),\hat{p}_1(x),\hat{p}_2(x)\)

\subsection*{Part (b): Best Approximation \( \hat{\phi} = \sum_{i=0}^{2} a_i x_i \)}

We want to find the best quadratic approximation of the form:

\[
\hat{\phi}(x) = a_0 + a_1 x + a_2 x^2.
\]

This approximation should minimize \( \| y - \hat{\phi} \| \). To do this, we need to compute the inner products of the data vector \( y \) with the orthonormal polynomials \( \hat{p}_0(x), \hat{p}_1(x), \hat{p}_2(x) \).

We first construct the inner product matrix \( A \) and the right-hand side vector \( b \):

\[
A = \begin{bmatrix}
\langle \hat{p}_0, \hat{p}_0 \rangle & \langle \hat{p}_0, \hat{p}_1 \rangle & \langle \hat{p}_0, \hat{p}_2 \rangle \\
\langle \hat{p}_1, \hat{p}_0 \rangle & \langle \hat{p}_1, \hat{p}_1 \rangle & \langle \hat{p}_1, \hat{p}_2 \rangle \\
\langle \hat{p}_2, \hat{p}_0 \rangle & \langle \hat{p}_2, \hat{p}_1 \rangle & \langle \hat{p}_2, \hat{p}_2 \rangle
\end{bmatrix},
\]

\[
A = I.
\]

and

\[
b = \begin{bmatrix}
\langle y, \hat{p}_0 \rangle \\
\langle y, \hat{p}_1 \rangle \\
\langle y, \hat{p}_2 \rangle
\end{bmatrix}.
\]

\[
b = \begin{bmatrix}
479.77807369657904 \\
49.25465438941764 \\
330.33066713574993
\end{bmatrix}.
\]

By solving the linear system \( A \mathbf{a} = b \), we obtain the coefficients \( a_0, a_1, a_2 \) for the best quadratic approximation:

\[
a_0 \approx 479.77807369657904
\]
\[
a_1 \approx 49.25465438941764
\]
\[
a_2 \approx 330.33066713574993
\]

So we get:

\[
\hat{\phi}(x) \approx 386.0-113.42657342657341x+9.041958041958042x^2 \approx 386.0-113.43x+9.04x^2.
\]

which is the the same as that of the example on the table of sales record in the notes

\subsection*{Part (c): Reusability of Calculations}

\textbf{Reusable Parts:}

- \textbf{Orthonormal Polynomial Construction:} If the \( x_i \)'s remain the same, the process of constructing the orthonormal polynomials can be reused because it depends only on the positions of \( x_i \)'s and not on the values of \( y_i \)'s.

- \textbf{Gram matrix Construction:} The Gram matrix can be reused because it remains the same \( I \).

\textbf{Non-Reusable Parts:}

- \textbf{Constant vector Construction:} Since the values of \( y_i \)'s are different, the inner products involving \( y \) need to be recomputed each time, so Constant vector needed to be recomputed.

- \textbf{Coefficient Calculation:} Since the values of \( y_i \)'s are different, Constant vercor need to be recomputed each time, so Coefficient are needed to be recomputed.

\textbf{Advantages of Orthonormal Polynomials:}

- \textbf{Reusability:} The orthonormal polynomials can be reused if the \( x_i \)'s remain the same, which saves computational effort.
- \textbf{Numerical Stability:} The Gram-Schmidt process offers better numerical stability compared to solving normal equations, as it avoids potential instability in matrix inversion.

\subsection*{Question 5.6.1 V}

\textbf{Answer:}\\

\textbf{Proof of Theorem 5.66:}\\

We prove each property using the Singular Value Decomposition (SVD) of the matrix \( A \).

\textbf{Singular Value Decomposition (SVD) of \( A \)}\\
Let \( A \in \mathbb{C}^{m \times n} \) be a matrix. The Singular Value Decomposition of \( A \) is given by:
\[
A = U \Sigma V^T,
\]
where:
- \( U \in \mathbb{C}^{m \times m} \) is a unitary matrix whose columns are the left singular vectors of \( A \),
- \( \Sigma \in \mathbb{C}^{m \times n} \) is a diagonal matrix with the singular values of \( A \) on the diagonal (with possible zero values),
- \( V \in \mathbb{C}^{n \times n} \) is a unitary matrix whose columns are the right singular vectors of \( A \),
- \( V^T \) is the conjugate transpose of \( V \).

The pseudo-inverse \( A^+ \) of \( A \) is given by:
\[
A^+ = V \Sigma^+ U^T,
\]
where \( \Sigma^+ \) is the pseudo-inverse of \( \Sigma \), which is obtained by taking the reciprocal of the non-zero singular values of \( \Sigma \).

\textbf{(PDI-1) \( AA^+ A = A \)}\\
We start by calculating \( AA^+ \):
\[
AA^+ = (U \Sigma V^T)(V \Sigma^+ U^T) = U \Sigma \Sigma^+ U^T.
\]
Since \( \Sigma \Sigma^+ \) is a diagonal matrix that acts as the identity on the subspace corresponding to the non-zero singular values of \( A \), we have:
\[
\Sigma \Sigma^+ = I_r,
\]
where \( I_r \) is the identity matrix on the column space of \( A \). Thus,
\[
AA^+ = U I_r U^T.
\]
This shows that \( AA^+ \) is a projection onto the column space of \( A \). Therefore, for any vector \( v \in \text{Col}(A) \), we have \( AA^+ v = v \), and hence:
\[
AA^+ A = A.
\]
This proves property (PDI-1).

\textbf{(PDI-2) \( A^+ A A^+ = A^+ \)}\\
Next, we compute \( A^+ A A^+ \):
\[
A^+ A A^+ = (V \Sigma^+ U^T)(U \Sigma V^T)(V \Sigma^+ U^T) = V \Sigma^+ \Sigma \Sigma^+ V^T.
\]
Since \( \Sigma^+ \Sigma \) is a diagonal matrix that acts as the identity on the subspace of non-zero singular values, we have:
\[
\Sigma^+ \Sigma \Sigma^+ = \Sigma^+.
\]
Thus:
\[
A^+ A A^+ = V \Sigma^+ V^T = A^+.
\]
This proves property (PDI-2).

\textbf{(PDI-3) Both \( AA^+ \) and \( A^+ A \) are Hermitian}\\
We now show that \( AA^+ \) and \( A^+ A \) are Hermitian matrices:
- Since \( AA^+ = U I_r U^T \) and \( U \) is unitary, we have:
  \[
  (AA^+)^T = (U I_r U^T)^T = U I_r^T U^T = U I_r U^T = AA^+.
  \]
  Thus, \( AA^+ \) is Hermitian.
- Similarly, for \( A^+ A \), we have:
  \[
  A^+ A = V \Sigma^+ \Sigma V^T,
  \]
  and since \( V \) is unitary, we get:
  \[
  (A^+ A)^T = V \Sigma^+ \Sigma V^T = A^+ A.
  \]
  Thus, \( A^+ A \) is also Hermitian.

This proves property (PDI-3).

Hence, we have proven all the properties of the pseudo-inverse in Theorem 5.66.

\textbf{Proof of Lemma 5.67:}\\

\textbf{Case 1: \( A \) has linearly independent columns}\\
If \( A \) has linearly independent columns, then the matrix \( A^T A \) is invertible. Using the SVD of \( A \):
\[
A = U \Sigma V^T,
\]
we have:
\[
A^+ = V \Sigma^+ U^T.
\]
Now, calculate \( A^T A \):
\[
A^T A = (V \Sigma^+ U^T) (U \Sigma V^T) = V \Sigma^+ \Sigma V^T.
\]
Since \( A^T A \) is invertible, we conclude that \( \Sigma^+ \Sigma = I_r \), and thus:
\[
A^+ = (A^T A)^{-1} A^T.
\]
Additionally, since \( A^+ A = I \), this proves that \( A^+ \) acts as a left inverse, i.e., \( A^+ A = I \).

\textbf{Case 2: \( A \) has linearly independent rows}\\
If \( A \) has linearly independent rows, then the matrix \( A A^T \) is invertible. Similarly, we compute:
\[
A^+ = A^T (A A^T)^{-1}.
\]
Thus, \( A^+ \) acts as a right inverse of \( A \), i.e., \( A A^+ = I \).

This proves Lemma 5.67.

% ===============================================
\section*{ \center{\normalsize {Acknowledgement}} }
None.


\printbibliography

\end{document}